clear all
set more off

global data 	"R:\SharedProjects\Shared2020-070\2016\extend_to_2020\JPE_Replication_dta"
global figures 	"R:\SharedProjects\Shared2020-070\2016\extend_to_2020\JPE_Replication_log"

cap log close
log using $figures\B_file,replace t

cd $data

*** Step 1: retrieve labor income data
*** Use W2 records

use "R:\Restricted\SSA Administrative Data\Respondent\DetEarn\stata\xyrdeternscr.dta", clear // note that this data is in long format
rename *, lower
egen hhidpn=concat(hhid pn)
destring hhidpn hhid pn year type source, replace

/*There may be multiple records per individual, get sum */

gcollapse (sum) w2box1,by(hhidpn year)

ren * total_*
ren total_hhidpn hhidpn
ren total_year year
keep hhidpn year total_w2box1
sort hhidpn year
save temp_w2,replace 


use "R:\Restricted\SSA Administrative Data\Respondent\SumEarn\stata\xyrsumern.dta" ,clear		
rename *, lower
egen hhidpn=concat(hhid pn)

keep earn* hhidpn source		
reshape long earn,i(hhidpn) j(year)

destring hhidpn year earn source, replace

sort hhidpn year
merge hhidpn year using temp_w2
erase temp_w2.dta
drop if _merge==2
drop _merge

replace earn=0 if earn==.
replace earn=25 if earn==.n						/*.n is a code for earnings b/w 0 and $50*/
replace total_w2box1=0 if total_w2box1==.

gen 	ls_ssa = 0
replace ls_ssa = 1 if earn!=0

gen 	ls_ssaw2 = 0
replace ls_ssaw2 = 1 if earn!=0|total_w2box1!=0

sort hhidpn year, stable
by hhidpn (year): gen experience 		= sum(ls_ssa)
by hhidpn (year): gen experience_alt 	= sum(ls_ssaw2)
label var experience "Years of non-zero SSA earnings"
label var experience "Years of non-zero SSA earnings/W2 earnings"
drop ls*

* note this panel is already complete ("balanced"), and zero earnings are coded as 0
drop if year<1978 | year>2020
ren source source_ssaearn
ren total_w2box1 w2earn
keep hhidpn year earn w2earn experience* source_ssaearn

ds hhidpn year source_ssaearn, not
global ll `r(varlist)'
reshape wide $ll, i(hhidpn) j(year)
save tempp,replace


*************************************************************************************************************************
*************************************************************************************************************************
*** Step 2: prepare F831 data and merge in SSA data
* F831 data: 1 record by individual-application, where an appeal to an existing application is a new record; ends in 2020
* In round 2 application data extended to 2020, and also pulls in source permits =2020
*************************************************************************************************************************
*************************************************************************************************************************

// keep every round of every appl cycle

use "R:\Restricted\SSA Administrative Data\Other\F831\stata\f831.dta", clear

rename *, upper	

gen 	reject_step=1 if (RB==33|RB==97|RB==98) & RDT!=1
replace reject_step=2 if (RB==30|RB==34|RB==35|RB==40|RB==41|RB>=44 & RB<=46|RB==51|RB>=75 & RB<=80) & RDT!=1
replace reject_step=4 if (RB==31|RB>=83 & RB<=84|RB==42) & RDT!=1
replace reject_step=5 if (RB==32|RB==81|RB==82|RB==85|RB==86|RB==43) & RDT!=1
replace reject_step=0 if (RB==999|RB==.|RB>=61 & RB<=73|RB>=36 & RB<=39|RB>=87 & RB<=96|RB>=99 & RB<=102) & RDT!=1
replace reject_step=6 if RDT == 1
rename *, lower	

label define reject_step 0 "Deny,unknown/insuff.evid." 1 "Deny, engaging in SGA" 2 "Deny, Medical" 4 "Deny, Vocational (prev.work)" 5 "Deny, Vocational (oth.work)" 6 "Award"
label values reject_step reject_step

egen hhidpn=concat(hhid pn)

destring hhidpn, replace 
format hhidpn %10.0f

keep if (rid==2	& (toc==1  | toc==2 | toc==11)) | (rid==16	& (toc==21  | toc==24))	/*Application is for worker DI or SSI disability - drop applications for child/spouse,etc*/

label define di_ssi 2 "DI" 16 "SSI"
label values rid di_ssi

drop lit* ln* qd*
keep 	hhidpn seq source bic al rid fld_y fld_m toc ccf dodec_y dodec_m strdate_y strdate_m rdt dob_y dob_m bs rpdx oy occ ind ///
		ed pd pdi pdd_y pdd_m reject_step rsdx spc eor cer rb vrn

ren rpdx pdx
ren rsdx sdx

drop if al>=3 & al<=6			/*Drop appeals outside SSA*/
replace al=2 if al!=1			/*Reopening and Remands: Assume they are Reconsiderations @ SSA*/

sort hhidpn fld_y fld_m rid al, stable


gen date_dec=mdy(dodec_m,1,dodec_y)								/*Date of decision*/
gen date_app=mdy(fld_m,1,fld_y) if fld_y < 2021					/*Date of application*/

gen date_state_receipt=mdy(strdate_m,1,strdate_y) if strdate_y != 9999

replace date_app = 	date_state_receipt if date_app ==. & ///
					date_state_receipt<=date_dec & ///
   					date_state_receipt!=.						/*Use  STATE RECEIPT DATE if date of application missing */
replace date_app = 	date_state_receipt if date_app!=. & ///
					date_state_receipt<=date_dec & ///
					date_app>date_dec & date_state_receipt!=.	/*Use  STATE RECEIPT DATE if date of application is > date decision */

drop if date_app == .		
drop if date_app>date_dec	

sort hhidpn seq
duplicates drop hhidpn rid date_app date_dec al rdt pdx sdx rb bs, force		/*drop clone records on key features*/

/*There are records with same date_app, same adjudicative level, common rid={SSI,DI}, but not necessarily same decision date*/
/*Stabilizes sample by keeping only latest decision*/

egen max_dec = max(date_dec), by(hhidpn rid date_app al)
 	
keep if date_dec == max_dec
drop max_dec								/*This drops remaining duplicates. There are no records with date_dec being the same*/

sort hhidpn rid date_app al, stable

***There are cases of people who, on the same date, apply for DI & SSI - these are concurrent applications
***Assume that can merge dual applications into one if: application and decision are on the same date, the adj.level is the same, 
***pdx+sdx are the same, and the decision is the same 

egen n=sum(hhidpn!=.),by(hhidpn date_app date_dec al pdx sdx rdt)				
egen conc_appl=group(hhidpn date_app date_dec al pdx sdx rdt)					

g rid_orig=rid
replace rid=216 if n==2 
label define di_ssi 2 "DI" 16 "SSI" 216 "DI and SSI",replace
label values rid di_ssi															

drop if n==2 & rid_orig==16		/*If concurrent application with same decision, etc., keep only info related to the DI part*/

drop n conc_appl

****There are people who submitted applications to DI and SSI at the same date, receive a decision at the same date, but the decision is *not* the same across the two
****Create the variable cycleid

egen cycleid=group(hhidpn date_app rid)
egen mxxx=min(cycleid),by(hhidpn)
replace cycleid=cycleid-mxxx+1
drop mxxx

 
egen n=sum(hhidpn!=.),by(hhidpn date_app)
/*there are records with a re-consideration but w/out an initial consideration*/
/*assume that cases in which there is a reconsideration but not an initial consideration are actually first round considerations*/  
replace al=1 if n==1 & al==2						
drop n

*Creates round variable
gen round=al												
bys hhidpn (date_app cycleid al): gen record=_n


* in case of appeal to initial consideration, replace application date w/ date of decision from previous round:
g date_app_orig=date_app
bysort hhidpn date_app cycleid (al): replace date_app=date_dec[_n-1] if al>1 & al!=. & _n>1			


gen success =.
replace success =1 if rdt==1 // define as success if application is approved  
replace success = 0 if rdt==2 | rdt==3 | rdt == 4
lab var success "Awarded DI/SSI"
gen nosuccess = 1-success
lab var nosuccess "Not awarded DI/SSI"

gen appeal=.
replace appeal=0 if al==1			/*Appeal means reconsideration at DDS level*/
replace appeal=1 if al>1 & al<9

lab var date_dec 	"Date of decision in given round of app cycle"
lab var date_app	"Date of application or appeal"
lab var appeal 		"Indicator for whether this round is appeal or not"
lab var cycleid 	"Indicator for application cycle (separate for SSI and DI)"
lab var round 		"Round in given application cycle"
lab var record 		"Overall round (i.e. 5 means 5th instance of application record (round))"

keep 	hhidpn source al rid rid_orig toc bs pdx sdx date_dec date_app date_app_orig appeal success nosuccess dob_y dob_m cycleid round record occ ind ///
		oy ed reject_step ccf spc eor cer rb vrn

***Uses PDX info to fill in some missing Body System codes 
replace bs=9  if pdx==2780  & bs>20
replace bs=7  if pdx==2840  & bs>20
replace bs=7  if pdx==2850  & bs>20
replace bs=12 if pdx==2960 & bs>20
replace bs=11 if pdx==3880 & bs>20
replace bs=11 if pdx==3890 & bs>20
replace bs=4  if pdx==4140 & bs>20
replace bs=1  if pdx==7150 & bs>20
replace bs=4  if pdx==4010 & bs>20
replace bs=5  if pdx==5710 & bs>20
replace bs=6  if pdx==5990 & bs>20
replace bs=6  if pdx==6290 & bs>20
replace bs=1  if pdx==7240 & bs>20
replace bs=1  if pdx==7280 & bs>20
replace bs=1  if pdx==7290 & bs>20
replace bs=20 if pdx==7910 & bs>20
replace bs=20 if pdx==7920 & bs>20
replace bs=20 if pdx==7930 & bs>20
replace bs=20 if pdx==7960 & bs>20
replace bs=8  if pdx==8790 & bs>20
replace bs=11 if pdx==9330 & bs>20


gen year_dec=year(date_dec) // set year to year of decision
gen year_app=year(date_app) // set year to year of decision

ren source source_f831
sort hhidpn date_dec, stable				

*** Step 4: merge F831 and labor income data

merge m:1 hhidpn using tempp
ren _merge merge_inc

gen ever_applied = 0
replace ever_applied = 1 if merge_inc==3 | merge_inc==1		/*Identifies people with a F831 record, so an applicant*/

* reshape data as long; event study type, with one event study per application round
reshape long $ll, i(hhidpn record) j(year2)

ren year2 year

gen year_to_apply = year-year_app 	// year_to_event=0 is year of application decision 
gen year_to_dec = year-year_dec 	// year_to_event=0 is year of decision 

* this dataset has panel earnings data for every person and application round

order hhidpn source_ssaearn source_f831 year
destring source_f831, replace

* need to impute F831 permission for those with no F831 records - set equal to source_ssaearn if that is >=2006
replace source_f831 = source_ssaearn if source_f831==. & source_ssaearn>=2006

save f831_and_inc.dta, replace

erase tempp.dta

***In this dataset we have, for each "record" in the F831 file, N_SSA records 
***It is a "balanced" data set: Each F831 record has exactly 43 SSA years (from 1978 to 2020)


log close


